{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 基尼系数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = [1, 1, 1, 2, 2, 2, 3, 3, 4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({1: 3, 2: 3, 3: 2, 4: 1})"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from collections import Counter\n",
    "counter = Counter(y)\n",
    "counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_values([3, 3, 2, 1])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counter.values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gini(y):\n",
    "    counter = Counter(y)\n",
    "    result = 0\n",
    "    for v in counter.values():\n",
    "        result += (v / len(y))**2\n",
    "    return 1 - result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7160493827160495"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2777777777777777"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini([1, 1, 1, 1, 1, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = np.array([[5, 5],\n",
    "             [4, 7],\n",
    "             [2, 5],\n",
    "             [1, 3],\n",
    "             [3, 4]])\n",
    "y = np.array([0, 0, 0, 1, 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAE2VJREFUeJzt3W+MXXd95/H3J7ELOGUdiUzBimNPpSKkhUAIV9lEIERjgQh/nAdkpazMQlDRKNAWqj5AsJZYJZIf9Mk2pUiJpmGrUIYS6iWsk00iIoJE+yBB14mTAMmuvNk4iZWuh1CcpmYDhu8+uCdlfJnx3BnfmTv+8X5JV/ec3/n5/L7+ee7nnjn3XJ9UFZKktpwz6QIkSeNnuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIatGlSA19wwQU1PT09qeEl6ax08ODBH1XV1HL9Jhbu09PT9Pv9SQ0vSWelJEdG6edpGUlqkOEuSQ0y3CWpQYa7JDXIcJekBi0b7knekOTQgscLSf5kqE+SfCHJ4SSPJrl07UqWJC1n2XCvqv9ZVZdU1SXA24ATwB1D3a4CXt89ZoCbx12opA1obg6mp+GccwbPc3OTrkidlV7nvgv431U1fJ3l1cCXa3DPvgeSnJ9kW1U9N5YqJW08c3MwMwMnTgzWjxwZrAPs2TO5ugSs/Jz7tcDfLtJ+IfDMgvVnuzZJrdq791fB/rITJwbtmriRwz3JbwG7gb9b7WBJZpL0k/Tn5+dXuxtJG8HTT6+sXetqJUfuVwEPVdX/XWTbUeCiBevbu7ZTVNVsVfWqqjc1tex/jSBpI9uxY2XtWlcrCff/wOKnZAAOAB/prpq5HDju+Xapcfv2wZYtp7Zt2TJo18SNFO5JzgPeDXxjQdv1Sa7vVu8GngQOA38FfHLMdUraaPbsgdlZ2LkTksHz7Kwfpm4QGVzgsv56vV75v0JK0sokOVhVveX6+Q1VSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDRr0T0/lJ9id5IsnjSa4Y2v6uJMeTHOoen1+bciVJo9g0Yr+/AO6tqmuS/BawZZE+f19VHxhfaZKk1Vo23JNsBd4JXAdQVT8Dfra2ZUmSzsQop2V+F5gH/jrJw0lu7W6YPeyKJI8kuSfJGxfbUZKZJP0k/fn5+TOpW5J0GqOE+ybgUuDmqnor8C/AZ4f6PATsrKq3AH8JfHOxHVXVbFX1qqo3NTV1BmVLkk5nlHB/Fni2qh7s1vczCPt/VVUvVNWL3fLdwOYkF4y1UknSyJYN96r6R+CZJG/omnYBP1zYJ8nrkqRbvqzb7/NjrlWSNKJRr5b5Y2Cuu1LmSeBjSa4HqKpbgGuATyQ5CfwUuLaqai0KliQtL5PK4F6vV/1+fyJjS9LZKsnBquot189vqEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgkcI9yflJ9id5IsnjSa4Y2p4kX0hyOMmjSS5dal+SpLU36p2Y/gK4t6qu6e7GtGVo+1XA67vHvwNu7p4lSROw7JF7kq3AO4EvAVTVz6rqJ0Pdrga+XAMPAOcn2Tb2aiVJIxnltMzvAvPAXyd5OMmtSc4b6nMh8MyC9We7NknSBIwS7puAS4Gbq+qtwL8An13NYElmkvST9Ofn51ezC0nSCEYJ92eBZ6vqwW59P4OwX+gocNGC9e1d2ymqaraqelXVm5qaWk29kqQRLBvuVfWPwDNJ3tA17QJ+ONTtAPCR7qqZy4HjVfXceEuVJI1q1Ktl/hiY666UeRL4WJLrAarqFuBu4H3AYeAE8LE1qFWSNKKRwr2qDgG9oeZbFmwv4A/HWJck6Qz4DVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoNGullHkqeAfwZ+AZysqt7Q9ncB/x34P13TN6rqxvGVKUlaiVFvswfw+1X1o9Ns//uq+sCZFiRJOnOelpGkBo0a7gV8K8nBJDNL9LkiySNJ7knyxsU6JJlJ0k/Sn5+fX1XBkqTljXpa5h1VdTTJ7wD3JXmiqr67YPtDwM6qejHJ+4BvAq8f3klVzQKzAL1er86wdknSEkY6cq+qo93zMeAO4LKh7S9U1Yvd8t3A5iQXjLlWSdKIlg33JOclefXLy8B7gO8P9XldknTLl3X7fX785UqSRjHKaZnXAnd02b0J+GpV3ZvkeoCqugW4BvhEkpPAT4Frq8rTLpI0IcuGe1U9CbxlkfZbFix/EfjieEuTJK2Wl0JKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoJHCPclTSR5LcihJf5HtSfKFJIeTPJrk0vGXKkka1UqO3H+/qi6pqt4i265icEPs1wMzwM3jKE5jMjcH09NwzjmD57m5SVck/eZZ59fhKLfZG8XVwJe7W+s9kOT8JNuq6rkx7V+rNTcHMzNw4sRg/ciRwTrAnj2Tq0v6TTKB1+GoR+4FfCvJwSQzi2y/EHhmwfqzXZsmbe/eX/1AvezEiUG7pPUxgdfhqEfu76iqo0l+B7gvyRNV9d2VDta9McwA7NixY6V/XKvx9NMra5c0fhN4HY505F5VR7vnY8AdwGVDXY4CFy1Y3961De9ntqp6VdWbmppaXcVamaXeRH1zldbPBF6Hy4Z7kvOSvPrlZeA9wPeHuh0APtJdNXM5cNzz7RvEvn2wZcupbVu2DNolrY8JvA5HOXJ/LfAPSR4Bvgf8j6q6N8n1Sa7v+twNPAkcBv4K+OSaVKuV27MHZmdh505IBs+zs36YKq2nCbwOM7jAZf31er3q93/tknlJ0mkkObjEJemn8BuqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjkcE9ybpKHk9y1yLbrkswnOdQ9Pj7eMiVJKzHqDbIBPg08DvybJbbfXlV/dOYlSZLO1EhH7km2A+8Hbl3bciRJ4zDqaZmbgM8AvzxNnw8leTTJ/iQXnXlpkqTVWjbck3wAOFZVB0/T7U5guqreDNwH3LbEvmaS9JP05+fnV1WwJGl5oxy5vx3YneQp4GvAlUm+srBDVT1fVS91q7cCb1tsR1U1W1W9qupNTU2dQdmSpNNZNtyr6nNVtb2qpoFrgfur6sML+yTZtmB1N4MPXiVJE7KSq2VOkeRGoF9VB4BPJdkNnAR+DFw3nvIkSauRqprIwL1er/r9/kTGlqSzVZKDVdVbrp/fUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjkcE9ybpKHk9y1yLZXJLk9yeEkDyaZHmeRkqSVWcmR+6dZ+t6ofwD8U1X9HvDnwJ+daWHSJMw9Nsf0TdOcc8M5TN80zdxjc5MuSVqVkcI9yXbg/cCtS3S5GritW94P7EqSMy9PWj9zj80xc+cMR44foSiOHD/CzJ0zBrzOSqMeud8EfAb45RLbLwSeAaiqk8Bx4DVnXJ20jvZ+ey8nfn7ilLYTPz/B3m/vnVBF0uotG+5JPgAcq6qDZzpYkpkk/ST9+fn5M92dNFZPH396Re3SRjbKkfvbgd1JngK+BlyZ5CtDfY4CFwEk2QRsBZ4f3lFVzVZVr6p6U1NTZ1S4NG47tu5YUbu0kS0b7lX1uaraXlXTwLXA/VX14aFuB4CPdsvXdH1qrJVKa2zfrn1s2bzllLYtm7ewb9e+CVUkrd6qr3NPcmOS3d3ql4DXJDkM/Cnw2XEUJ62nPRfvYfaDs+zcupMQdm7dyewHZ9lz8Z5JlyatWCZ1gN3r9arf709kbEk6WyU5WFW95fr5DVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNMo9VF+Z5HtJHknygyQ3LNLnuiTzSQ51j4+vTbmSpFFsGqHPS8CVVfViks3APyS5p6oeGOp3e1X90fhLlCSt1LLh3t0L9cVudXP38P6okrSBjXTOPcm5SQ4Bx4D7qurBRbp9KMmjSfYnuWisVUqSVmSkcK+qX1TVJcB24LIkbxrqcicwXVVvBu4DbltsP0lmkvST9Ofn58+kbknSaazoapmq+gnwHeC9Q+3PV9VL3eqtwNuW+POzVdWrqt7U1NRq6pUkjWCUq2WmkpzfLb8KeDfwxFCfbQtWdwOPj7NISdLKjHK1zDbgtiTnMngz+HpV3ZXkRqBfVQeATyXZDZwEfgxct1YFS5KWl8HFMOuv1+tVv9+fyNiSdLZKcrCqesv18xuqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGjXKbvVcm+V6SR5L8IMkNi/R5RZLbkxxO8mCS6bUodqG5x+aYvmmac244h+mbppl7bG6th5Sks8YoR+4vAVdW1VuAS4D3Jrl8qM8fAP9UVb8H/DnwZ+Mt81Rzj80xc+cMR44foSiOHD/CzJ0zBrwkdZYN9xp4sVvd3D2G7813NXBbt7wf2JUkY6tyyN5v7+XEz0+c0nbi5yfY++29azWkJJ1VRjrnnuTcJIeAY8B9VfXgUJcLgWcAquokcBx4zSL7mUnST9Kfn59fddFPH396Re2S9JtmpHCvql9U1SXAduCyJG9azWBVNVtVvarqTU1NrWYXAOzYumNF7ZL0m2ZFV8tU1U+A7wDvHdp0FLgIIMkmYCvw/DgKXMy+XfvYsnnLKW1bNm9h3659azWkJJ1VRrlaZirJ+d3yq4B3A08MdTsAfLRbvga4v6qGz8uPzZ6L9zD7wVl2bt1JCDu37mT2g7PsuXjPWg0pSWeVTSP02QbcluRcBm8GX6+qu5LcCPSr6gDwJeBvkhwGfgxcu2YVd/ZcvMcwl6QlLBvuVfUo8NZF2j+/YPn/Af9+vKVJklbLb6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBmUNv2t0+oGTeeDIGHZ1AfCjMexnnDZiTWBdK7ERawLrWomNWBOceV07q2rZ/79lYuE+Lkn6VdWbdB0LbcSawLpWYiPWBNa1EhuxJli/ujwtI0kNMtwlqUEthPvspAtYxEasCaxrJTZiTWBdK7ERa4J1quusP+cuSfp1LRy5S5KGnBXhnuS/JjmW5PtLbE+SLyQ5nOTRJJdugJreleR4kkPd4/OL9VuDui5K8p0kP0zygySfXqTPus7XiDWt+3wleWWS7yV5pKvrhkX6vCLJ7d1cPZhkeoPUdV2S+QXz9fG1rqsb99wkDye5a5Ft6z5XI9Y1qbl6Kslj3Zj9Rbav7euwqjb8A3gncCnw/SW2vw+4BwhwOfDgBqjpXcBdE5irbcCl3fKrgf8F/NtJzteINa37fHV//9/uljcDDwKXD/X5JHBLt3wtcPsGqes64IsT+Pn6U+Cri/1bTWKuRqxrUnP1FHDBabav6evwrDhyr6rvMrgJyFKuBr5cAw8A5yfZNuGaJqKqnquqh7rlfwYeZ3AD84XWdb5GrGnddX//F7vVzd1j+EOoq4HbuuX9wK4k2QB1rbsk24H3A7cu0WXd52rEujaqNX0dnhXhPoILgWcWrD/LBggP4IruV+t7krxxvQfvfi1+K4Mjv4UmNl+nqQkmMF/dr/OHgGPAfVW15FxV1UngOPCaDVAXwIe6X+f3J7lorWsCbgI+A/xyie0TmasR6oL1nysYvCF/K8nBJDOLbF/T12Er4b4RPcTga8JvAf4S+OZ6Dp7kt4H/BvxJVb2wnmMvZZmaJjJfVfWLqroE2A5cluRN6zHuckao605guqreDNzHr46Y10SSDwDHqurgWo6zUiPWta5ztcA7qupS4CrgD5O8c53GBdoJ96PAwnfj7V3bxFTVCy//al1VdwObk1ywHmMn2cwgROeq6huLdFn3+VqupknOVzfmT4DvAO8d2vSvc5VkE7AVeH7SdVXV81X1Urd6K/C2NS7l7cDuJE8BXwOuTPKVoT6TmKtl65rAXL087tHu+RhwB3DZUJc1fR22Eu4HgI90nz5fDhyvqucmWVCS1718vjHJZQzmes1DoRvzS8DjVfVflui2rvM1Sk2TmK8kU0nO75ZfBbwbeGKo2wHgo93yNcD91X0aNsm6hs7N7mbwOcaaqarPVdX2qppm8GHp/VX14aFu6z5Xo9S13nPVjXlekle/vAy8Bxi+sm5NX4fL3iB7I0jytwyuprggybPAf2bwIRNVdQtwN4NPng8DJ4CPbYCargE+keQk8FPg2rX+Qe+8HfiPwGPdOVuA/wTsWFDbes/XKDVNYr62AbclOZfBm8nXq+quJDcC/ao6wOBN6W+SHGbwAfq1a1zTqHV9Kslu4GRX13XrUNev2QBzNUpdk5ir1wJ3dMcrm4CvVtW9Sa6H9Xkd+g1VSWpQK6dlJEkLGO6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXo/wNe3QAoEfRfhwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(X[y == 0, 0], X[y == 0, 1], color='r')\n",
    "plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cut(X, y, d, v):\n",
    "    left_index = (X[:, d] <= v)\n",
    "    right_index = (X[:, d] > v)\n",
    "    return X[left_index], X[right_index], y[left_index], y[right_index]\n",
    "\n",
    "def try_split(X, y):\n",
    "    best_g = 1\n",
    "    best_d = -1\n",
    "    best_v = -1\n",
    "    \n",
    "    for d in range(X.shape[1]):\n",
    "        sorted_index = np.argsort(X[:, d])\n",
    "        for i in range(len(X)-1):\n",
    "            if X[sorted_index[i], d] == X[sorted_index[i + 1], d]:\n",
    "                continue\n",
    "            \n",
    "            v = (X[sorted_index[i], d] + X[sorted_index[i + 1], d]) / 2\n",
    "            # print('d={}, v={}'.format(d, v))\n",
    "            X_left, X_right, y_left, y_right = cut(X, y, d, v)\n",
    "            \n",
    "            g_all = gini(y_left) + gini(y_right)\n",
    "            \n",
    "            print('d={}, v={}, g={}'.format(d, v, g_all))\n",
    "            \n",
    "            if g_all < best_g:\n",
    "                best_g = g_all\n",
    "                best_d = d\n",
    "                best_v = v\n",
    "    return best_d, best_v, best_g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d=0, v=1.5, g=0.375\n",
      "d=0, v=2.5, g=0.9444444444444444\n",
      "d=0, v=3.5, g=0.4444444444444444\n",
      "d=0, v=4.5, g=0.5\n",
      "d=1, v=3.5, g=0.375\n",
      "d=1, v=4.5, g=0.0\n",
      "d=1, v=6.0, g=0.5\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(1, 4.5, 0.0)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "try_split(X, y)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
